---
title: "Virus Count Analysis"
subtitle: "tidied by sean :)"
author: "Ramon Lorenzo"
date: today
format:
html:
toc: true
page-layout: full
embed-resources: true
code-fold: true
code-tools: true
link-external-newwindow: true
execute:
warning: false
from: markdown+emoji
editor_options:
chunk_output_type: console
---
```{r}
#| label: load-packages-data
library (tidyverse)
# devtools::install_github("vqv/ggbiplot")
library (ggbiplot)
library (plotly)
library (ggsci)
library (ggpubr)
library (pscl)
library (glmmTMB)
library (performance)
library (emmeans)
library (fitdistrplus)
virus_counts <- read_csv ("data/Virus count - R21.csv" ) |>
janitor:: clean_names (replace= janitor::: mu_to_u) |>
mutate (donor = factor (donor),
image = factor (image),
virions = as.numeric (as.character (virions)),
penetrating_virions = as.numeric (as.character (penetrating_virions)),
percent_of_penetrating_virions = as.numeric (as.character (percent_of_penetrating_virions)))
# making some tactical deletions-- no percent of penetrating greater than 100; no donor 1905; no NA virions
virus_counts <- virus_counts |>
filter (donor != "1905" ) |>
filter (percent_of_penetrating_virions < 101 ) |>
filter (! is.na (virions))
```
## distribution check
based on some distribution evaluation things, it looks like lognormal is gonna fit our data the best:
```{r}
plotdist (virus_counts$ virions, histo = TRUE , demp = TRUE )
descdist (virus_counts$ virions, discrete= FALSE , boot= 500 )
# i guess this is a mostly lognormal distribution??
fnb<- fitdist (virus_counts$ virions, "nbinom" )
fp<- fitdist (virus_counts$ virions, "pois" )
plot.legend <- c ( "nbinom" , "pois" )
denscomp (list (fnb,fp), legendtext = plot.legend)
qqcomp (list (fnb,fp), legendtext = plot.legend)
# based on qqplot-- poisson sucks
cdfcomp (list (fnb,fp), legendtext = plot.legend)
ppcomp (list (fnb,fp), legendtext = plot.legend)
# idk what a pp plot is either but im just gonna say poisson has not been killing it here
gofstat (list (fnb,fp))
```
## Initial Modeling
so let's make a lognormal model!
```{r}
initial_model <- lme4:: lmer (virions ~ condition* tissue+ (1 | donor), virus_counts)
plot (check_distribution (initial_model))
zinbm0<- glmmTMB (virions ~ condition* tissue+ (1 | donor),
family= "nbinom2" ,
ziformula= ~ 1 ,data = virus_counts)
zinbm1<- glmmTMB (virions ~ tissue* condition+ (1 | donor),
family= "nbinom2" ,
data = virus_counts)
zinbm2<- glmmTMB (virions ~ condition* tissue+ (1 | donor),
family= "nbinom2" ,
ziformula= ~ tissue,data = virus_counts)
anova (zinbm0,zinbm1,zinbm2)
model_performance (zinbm0)
check_model (zinbm0)
compare_performance (zinbm0,zinbm1,zinbm2)
```
Then we can compare EMMs of our best model, binom0 :
```{r}
t <- pairs (emmeans (zinbm0, ~ condition))
s <- pairs (emmeans (zinbm0, ~ tissue))
t.s <- pairs (emmeans (zinbm0, ~ tissue | condition))
s.t <- pairs (emmeans (zinbm0, ~ condition | tissue))
rbind (t.s, s.t,adjust= "fdr" )
check_model (zinbm0)
ggboxplot (virus_counts, x = "condition" ,
y = "virions" ,
combine = TRUE ,
color = "tissue" , palette = "npg" ,
ylab = "virions" ,
add = "jitter" , # Add dotplot
add.params = list (binwidth = 0.2 ))+ scale_y_log10 ()
```
## what's this MEAN?
look at some means of data:
```{r}
mean_virus_counts <- virus_counts %>%
group_by (condition, tissue, donor) %>%
summarise (mean_virions = mean (virions))
mean_model <- lme4:: lmer (log10 (mean_virions) ~ condition* tissue+ (1 | donor),
data = mean_virus_counts)
plot (check_distribution (mean_model))
t <- pairs (emmeans (mean_model, ~ condition))
s <- pairs (emmeans (mean_model, ~ tissue))
t.s <- pairs (emmeans (mean_model, ~ tissue | condition))
s.t <- pairs (emmeans (mean_model, ~ condition | tissue))
rbind (t.s, s.t,adjust= "fdr" )
check_model (mean_model)
qqnorm (resid (mean_model))
qqline (resid (mean_model))
hist (resid (mean_model))
plot (fitted (mean_model),resid (mean_model))
abline (h= 0 )
mean_virus_counts %>%
ggplot (aes (x= condition,y= mean_virions,color= tissue))+
geom_boxplot (outlier.shape = NA )+
geom_point (position = position_jitterdodge ())+
theme_pubr ()+ scale_color_npg ()+
facet_grid (.~ tissue, scales = "free" )
```
## Penetrators
```{r}
Penetrators <- virus_counts |>
dplyr:: select (condition, tissue, group, percent_of_penetrating_virions)
lm_penetrators <- gamlss:: gamlss ((percent_of_penetrating_virions/ 100 ) ~ condition* tissue,
# beta inflated something something
family = gamlss.dist:: BEINF (),
data = Penetrators)
plot (check_distribution (lm_penetrators))
# Check on this:
# binned_residuals(lm_penetrators, residuals = "response")
# check_model(lm_penetrators)
t <- pairs (emmeans (lm_penetrators, ~ condition))
s <- pairs (emmeans (lm_penetrators, ~ tissue))
t.s <- pairs (emmeans (lm_penetrators, ~ tissue | condition))
s.t <- pairs (emmeans (lm_penetrators, ~ condition | tissue))
rbind (t.s,s.t,adjust= "FDR" )
qqnorm (resid (lm_penetrators))
qqline (resid (lm_penetrators))
hist (resid (lm_penetrators))
plot (fitted (lm_penetrators),resid (lm_penetrators))
abline (h= 0 )
# maybe see higher of pct of penetrating virions in uncircumcised?
virus_counts%>%
ggplot (aes (x= condition,y= percent_of_penetrating_virions,color= condition))+
geom_boxplot (outlier.shape = NA )+
geom_point (position = position_jitterdodge ())+
theme_pubr ()+ scale_color_npg ()+ facet_grid (.~ tissue,scales = "free" )
mean_penetrators <- virus_counts %>%
group_by (condition, tissue, donor) %>%
summarise (mean_percent_of_penetrating_virions = mean (percent_of_penetrating_virions))
```
## Depth
analysis of how far the virions penetrated
```{r}
Depth<- virus_counts[,c (1 : 4 ,9 : 59 )]
Depth<- Depth%>%
dplyr:: rename ("x9" = "penetration_depths_um" ) %>%
pivot_longer (x9: x59,
names_to = "particle" ,
values_to = "penetration_depth" ) |>
dplyr:: select (! particle) |>
drop_na (penetration_depth) |>
mutate (penetration_depth = as.numeric (as.character (penetration_depth)))
mean_depth <- Depth %>%
group_by (condition, tissue, donor) %>%
summarise (mean_depth = mean (penetration_depth))
Depth%>%
ggplot (aes (x= condition,y= penetration_depth,color= tissue))+
geom_boxplot (outlier.shape = NA )+
geom_point (position = position_jitterdodge ())+
theme_pubr ()+ scale_color_npg ()+
facet_grid (.~ tissue,scales = "free" )+ scale_y_log10 ()
mean_depth%>%
ggplot (aes (x= condition,y= mean_depth,color= tissue))+
geom_boxplot (outlier.shape = NA )+
geom_point (position = position_jitterdodge ())+
theme_pubr ()+ scale_color_npg ()+
facet_grid (.~ tissue,scales = "free" )
fl<- fitdist (mean_depth$ mean_depth, "lnorm" )
fn<- fitdist (mean_depth$ mean_depth, "norm" )
fg<- fitdist (mean_depth$ mean_depth, "gamma" )
plot.legend <- c ("normal" , "gamma" , "lognorm" )
denscomp (list (fn,fg,fl), legendtext = plot.legend)
qqcomp (list (fn,fg,fl), legendtext = plot.legend)
cdfcomp (list (fn,fg,fl), legendtext = plot.legend)
ppcomp (list (fn,fg,fl), legendtext = plot.legend)
gofstat (list (fn,fg,fl))
lm_mean_depth <- lme4:: lmer (log10 (mean_depth) ~ condition* tissue + (1 | donor), data = mean_depth)
plot (check_distribution (lm_mean_depth))
binned_residuals (lm_mean_depth)
check_model (lm_mean_depth)
t <- pairs (emmeans (lm_mean_depth, ~ condition))
s <- pairs (emmeans (lm_mean_depth, ~ tissue))
t.s <- pairs (emmeans (lm_mean_depth, ~ tissue | condition))
s.t <- pairs (emmeans (lm_mean_depth, ~ condition | tissue))
rbind (t.s,s.t,adjust= "FDR" )
qqnorm (resid (lm_mean_depth))
qqline (resid (lm_mean_depth))
hist (resid (lm_mean_depth))
plot (fitted (lm_mean_depth),resid (lm_mean_depth))
abline (h= 0 )
```